function [all_episodes]=simulation_exp(alpha,beta,T1,Ts,T2,Deltav2,vh,vl,mu_l,sigma_l,grid_middle,tick,grid_size,N)

%Runs a learning experiment once.

%For T1 episodes the algorithms learn as in the baseline model.
%For Ts "shock" episodes we change Delta v to Deltav2.
%For T2 episodes we return to the baseline specification.

%This function returns all_episodes, a (T1+Ts+T2)x(6*N+2) matrix with the values in all episodes 1
%to T1+Ts+T2 of A_N (quoted price of AMM N), opt_N (greedy price of AMM N),
%Profit_N (profit of AMM N), a_min (lowest quoted price), and the same variables squared. 

%In the case N=2 for instance the columns of all_episodes are ordered as follows:
%1: A_1
%2: A_2
%3: opt_1
%4: opt_2
%5: Profit_1
%6: Profit_2
%7: a_min
%8: A_1^2
%9: A_2^2
%10: opt_1^2
%11: opt_2^2
%12: Profit_1^2
%13: Profit_2^2
%14: a_min^2

%Pre-allocate
all_episodes = zeros(T1+Ts+T2,6*N+2);

%Inizialize Q-matrix with random values higher than monopoly expected
%profit.

Q_n=3+(6-3)*rand((2*grid_size+1),N);

%Generate a vector of T1+Ts+T2 observation with prob=0.5 to be vl and prob=0.5 to be vh.
v_tilde1 = randsample([vl, vh], T1, true)';      %compute the value of the asset in each episode before the shock
v_tilde2 = randsample([2-Deltav2/2, 2+Deltav2/2], Ts, true)';      %compute the value of the asset in each episode during the shock
v_tilde3 = randsample([vl, vh], T2, true)';  %compute the value of the asset in each episode after the shock

v_tilde = [v_tilde1 ; v_tilde2 ; v_tilde3];


%Compute vc normally
l  = normrnd(mu_l,sigma_l,T1+Ts+T2,1);                %vector of random draws of l in each episode
vc = v_tilde +l;                                      %vector of investor valuations in each episode

%Generate vector to determine experimentation episodes
epsilon = exp(-beta*(1:1:T1+Ts+T2));               %Vector of experimentation probabilities

%Generate T x N matrix with 1 in (t,n) if n experiments at time t.
Experiment = zeros(T1+Ts+T2,N);

parfor n=1:N
    Experiment(:,n) = binornd(1,epsilon);  
end

%Loop over all episodes.
for t = 1:T1+Ts+T2
    %We compute the greedy price in episode t, for each AMM.
    %We preallocate the index of price (from 1 to 2*grid_size+1) chosen by each AMM:
    s = zeros(1,N);
    for i=1:N
        maxvector = find(Q_n(:,i) == max(Q_n(:,i))); %Indices of all the values corresponding to a maximum. 
        %Note: finding all the values with maxvector is irrelevant if the Q_matrix has been initialized with
        %continuous random variables, but is important otherwise.
        s(1,i)  = maxvector(randi([1 length(maxvector)],1,1)); %Randomize if there are multiple greedy prices
        all_episodes(t,N+i) = (grid_middle - grid_size*tick - tick)+(s(1,i)*tick);  %Convert the price index 
        %into the actual price and record the greedy price of AMM i at time t

    %Compute the actual price chosen by AMM i based on experimentation or exploitation 
    %If Experiment=1 AMM i explores at this round. 

        if Experiment(t,i) == 1
            s(1,i)  = randi([1 (2*grid_size+1)],1,1);  %Generates a random index in the range 1 to 2*grid_size+1
            all_episodes(t,i) = (grid_middle - grid_size*tick - tick) + tick*s(1,i);  %Records the corresponding price as the price quoted by AMM i

     %If Experiment=0 the player exploits at this round. 
        else
            all_episodes(t,i) = all_episodes(t,N+i) ;            %Records the greedy price as the actual price
        end
    end 

    %We compute the profit in episode t. Investor buys if vc is greater than 
    %the best price a_min, such that profit is (a_min - v_tilde) if [vc >= a_min], and zero otherwise.
    %If AMMs set the same price the profit is split between the AMMs.

    all_episodes(t,3*N+1) = min(all_episodes(t,1:N));           %compute the lowest price:
    if  all_episodes(t,3*N+1) <= vc(t)                          %check that the customer buys      
        index = find(all_episodes(t,3*N+1) == all_episodes(t,1:N));  %find all AMMs that set a_min
        m = size(index,2);                                      %compute the number of AMMs that set a_min
        for i=index
            all_episodes(t,2*N+i) = (all_episodes(t,3*N+1) - v_tilde(t) ) / m;    %compute these AMMs' profits. 
            % Note that for all other players or if the customer doesn't buy the profit remains the pre-allocated value of zero.
        end
    end 

    %Update the Q-Matrix for each player 
    for j=1:N
        %Update the q-value associated with the price actually played:
        Q_n(s(1,j),j) = alpha*all_episodes(t,2*N+j) + (1-alpha)*Q_n(s(1,j),j);
    end

end %ends the loop on episodes t

%Compute the squared values of the variables in all_episodes:
all_episodes(:,3*N+2:6*N+2) = all_episodes(:,1:3*N+1).^2;


